********************************************************************************
** 	TITLE:		sk2020_skpes_contact_networks                                 ** 	
**	AUTHOR:	    Philippe Mongrain                                             **
**	DATA:       Mongrain_Module                                               **
**  DATE:	    October 2022 						                          **	
**	VERSION:	Stata 16					                                  **
********************************************************************************

* Version control

version 16.0

* Open log file

capture log close
log using "sk2020_skpes_contact_networks", replace

* Open dataset

use "SK_2020.dta", clear


***************************
** INDEPENDENT VARIABLES **
***************************

* Time

drop time

gen surveydate = dofc(EndDate)

format %tdMon_DD,_CCYY surveydate

gen edate = 20201026

gen electiondate = date(string(edate,"%8.0f"),"YMD")

format %tdMon_DD,_CCYY electiondate

gen time = electiondate - surveydate

* Age

gen age = 2020 - (1919 + cps_yob)

* Gender

gen male = cps_gender

replace male = . if male == 3
replace male = 0 if male == 2

* Education

gen education = cps_edu

* Interest for election

gen interest = cps_intelection_1

replace interest = . if interest == -99

* News attentiveness

gen news = cps_newstime

* Party identification (provincial)

gen pidstatus_whole = cps_provpid

replace pidstatus_whole = . if pidstatus_whole == -99

replace pidstatus_whole = 2 if pidstatus_whole == 299
replace pidstatus_whole = 3 if pidstatus_whole == 281 & pidstatus_whole!=2 & pidstatus_whole!=.
replace pidstatus_whole = 1 if pidstatus_whole!=281 & pidstatus_whole!=2 & pidstatus_whole!=3 & pidstatus_whole!=.

label define pidstatus 1 "Loser PID" 2 "No PID" 3 "Winner PID" 
label values pidstatus_whole pidstatus

* PID strength

gen strength = cps_provpidstr

* Party identification scale (provincial)

gen pidscale_whole = .

replace pidscale_whole = 1 if pidstatus_whole == 1 & strength == 1
replace pidscale_whole = 2 if pidstatus_whole == 1 & strength == 2
replace pidscale_whole = 3 if pidstatus_whole == 1 & strength == 3
replace pidscale_whole = 4 if pidstatus_whole == 2
replace pidscale_whole = 5 if pidstatus_whole == 3 & strength == 3
replace pidscale_whole = 6 if pidstatus_whole == 3 & strength == 2
replace pidscale_whole = 7 if pidstatus_whole == 3 & strength == 1

label define pidscale 1 "Strong loser PID" 2 "Moderate loser PID" 3 "Weak loser PID" 4 "No PID" 5 "Weak winner PID" 6 "Moderate winner PID" 7 "Strong winner PID" 
label values pidscale_whole pidscale

* Number of family members and relatives

gen relatives = MongrainSK2

replace relatives = . if relatives == -99

replace relatives = relatives - 1

* Number of close friends

gen friends = pes_network

replace friends = . if friends == -99

replace friends = friends - 1

* Network size

gen relatives_4pts = relatives
replace relatives_4pts = 2 if relatives == 2
replace relatives_4pts = 2 if relatives == 3
replace relatives_4pts = 2 if relatives == 4
replace relatives_4pts = 2 if relatives == 5
replace relatives_4pts = 3 if relatives > 5 & relatives!=.

gen size = relatives_4pts + friends

gen relatives_3pts = relatives
replace relatives_3pts = 0 if relatives == 1
replace relatives_3pts = 1 if relatives >=2 & relatives <=5
replace relatives_3pts = 2 if relatives > 5 & relatives!=.

gen friends_3pts = friends
replace friends_3pts = 0 if friends == 1
replace friends_3pts = 1 if friends == 2
replace friends_3pts = 2 if friends == 3

gen size_3pts = relatives_3pts + friends_3pts
replace size_3pts = 0 if size_3pts == 1
replace size_3pts = 1 if size_3pts == 2
replace size_3pts = 1 if size_3pts == 3
replace size_3pts = 2 if size_3pts == 4

* Discussion with people at work or school

gen discussion_work_school = MongrainSK4

replace discussion_work_school = discussion_work_school - 1

gen discussion_work_school_3pts = discussion_work_school 

replace discussion_work_school_3pts = 2 if discussion_work_school == 3

* Discussion with family and friends

gen discussion_family_friends = pes_discussfriends

replace discussion_family_friends = . if discussion_family_friends == -99

replace discussion_family_friends = discussion_family_friends - 1

gen discussion_family_friends_3pts = discussion_family_friends

replace discussion_family_friends_3pts = 2 if discussion_family_friends == 3

* Political discussion frequency

gen discussion = discussion_work_school + discussion_family_friends

egen discussion_3pts = rowmax(discussion_work_school_3pts discussion_family_friends_3pts)

* Disagreement

gen disagreement = pes_disagreefriends

replace disagreement = . if disagreement == -99

replace disagreement = disagreement - 1

replace disagreement = 0 if friends == 0

gen disagreement_3pts = disagreement

replace disagreement_3pts = 2 if disagreement == 3


****************************************
** NATIONAL-LEVEL EXPECTATIONS: SEATS **
****************************************

gen forecast_whole = MongrainSK1

gen correct_whole_d = 0 if forecast_whole!=1
replace correct_whole_d = 1 if forecast_whole == 1


**********
** SAVE **
**********

save "sk2020_skpes.dta", replace


*******************
** MAIN ANALYSES **
*******************

* Regression analysis and percentage of correct and incorrect forecasts

logistic correct_whole_d discussion disagreement size pidscale_whole interest news education age i.male
estimates table, star(.05 .01 .001)

margins, at(pidscale_whole=1) atmeans predict(pr) // Average marginal effects
margins, at(pidscale_whole=7) atmeans predict(pr) // Average marginal effects

tab correct_whole_d if e(sample) == 1

logistic correct_whole_d discussion disagreement size pidscale_whole interest news education age i.male
estimates table, star(.05 .01 .001)

margins, dydx(*) predict(pr) // Average marginal effects

tab correct_whole_d if e(sample) == 1


************************
** SUMMARY STATISTICS **
************************

label variable correct_whole_d "Most seats"
label variable discussion "Discussion"
label variable disagreement "Disagreement"
label variable size "Network size"
label variable pidscale_whole "PID"
label variable interest "Interest"
label variable news "News attentiveness"
label variable education "Education"
label variable age "Age"
label variable male "Gender (male = 1)"

quietly logistic correct_whole_d discussion disagreement size pidscale_whole interest news age i.male education

estpost tabstat correct_whole_d discussion disagreement size pidscale_whole interest news age male education if e(sample) == 1, statistics(n mean p50 sd min max) columns(statistics)

esttab using "sk2020_skpes_summary_provincial.tex", substitute("\begin{table}[htbp]" "\begin{table}[H]") cells("count(label(N) fmt(%9.0fc)) mean(fmt(%5.2f) label(Mean)) p50(fmt(%5.1f) label(Median)) sd(fmt(%5.1f) label(Std. dev.)) min(fmt(%5.1f) label(Min)) max(fmt(%5.1f) label(Max))") label width(\textwidth) nomtitle nonumber noobs booktabs title("Summary statistics -- 2020 Saskatchewan provincial election (SKPES), provincial level") replace

eststo clear

log close